# Load and prep data
data(weather)
# Define wind direction and season
weather <- weather %>%
mutate(
wind_direction = case_when(
is.na(wind_dir) ~ NA_character_,
wind_dir >= 337.5 | wind_dir < 22.5 ~ "N",
wind_dir >= 22.5 & wind_dir < 67.5 ~ "NE",
wind_dir >= 67.5 & wind_dir < 112.5 ~ "E",
wind_dir >= 112.5 & wind_dir < 157.5 ~ "SE",
wind_dir >= 157.5 & wind_dir < 202.5 ~ "S",
wind_dir >= 202.5 & wind_dir < 247.5 ~ "SW",
wind_dir >= 247.5 & wind_dir < 292.5 ~ "W",
wind_dir >= 292.5 & wind_dir < 337.5 ~ "NW"
),
season = case_when(
month %in% c(3,4,5) ~ "Spring",
month %in% c(6,7,8) ~ "Summer",
month %in% c(9,10,11) ~ "Fall",
month %in% c(12,1,2) ~ "Winter"
)
)
Part I
Q1: Suspicious Records
# Find suspicious values
weather %>% filter(temp > 100 | temp < -20 | wind_speed > 50) %>%
select(month, day, hour, temp, wind_speed)
## # A tibble: 3 × 5
## month day hour temp wind_speed
## <int> <int> <int> <dbl> <dbl>
## 1 2 12 3 39.0 1048.
## 2 7 18 15 100. 9.21
## 3 7 19 16 100. 20.7
ggplot(weather, aes(x = temp)) + geom_histogram(bins = 50) + ggtitle("Temperature Distribution")

ggplot(weather, aes(x = wind_speed)) + geom_histogram(bins = 50) + ggtitle("Wind Speed Distribution")

# Fix suspicious values
weather <- weather %>%
mutate(
temp = ifelse(temp > 100 | temp < -20, NA, temp),
wind_speed = ifelse(wind_speed > 50, NA, wind_speed)
)
Q2: Wind Direction
table(weather$wind_direction, useNA = "ifany")
##
## E N NE NW S SE SW W <NA>
## 1383 4163 2459 4432 3908 1331 3822 4157 460
ggplot(weather %>% filter(!is.na(wind_direction)), aes(x = wind_direction)) +
geom_bar() + theme_minimal() + ggtitle("Wind Direction Frequency")

Q3: Wind Direction by Season
ggplot(weather %>% filter(!is.na(wind_direction)), aes(x = season, fill = wind_direction)) +
geom_bar(position = "dodge") + theme_minimal()

Q4: Temperature Distribution
ggplot(weather, aes(x = temp)) +
geom_histogram(aes(y = ..density..), bins = 50) +
stat_function(fun = dnorm, args = list(mean = mean(weather$temp, na.rm = TRUE),
sd = sd(weather$temp, na.rm = TRUE)),
color = "red") +
geom_density(color = "blue") + theme_minimal()

ggplot(weather, aes(sample = temp)) + stat_qq() + stat_qq_line() + ggtitle("QQ Plot")

Q5: Temperature by Season
ggplot(weather, aes(x = season, y = temp)) + geom_boxplot() +
stat_compare_means(comparisons = list(c("Summer", "Winter"), c("Spring", "Fall")))

Part II
daily <- weather %>%
group_by(month, day) %>%
summarise(
mean_temp = mean(temp, na.rm = TRUE),
min_temp = min(temp, na.rm = TRUE),
max_temp = max(temp, na.rm = TRUE),
mean_wind = mean(wind_speed, na.rm = TRUE),
season = first(season),
date = as.Date(paste0("2013-", month[1], "-", day[1])),
.groups = "drop"
)
Q6: Max vs Min Temperature
ggplot(daily, aes(x = min_temp, y = max_temp, color = season)) +
geom_point() + geom_smooth(method = "lm", se = FALSE, aes(group = 1), color = "black")

Q7: Wind Speed vs Temperature
ggplot(daily, aes(x = mean_temp, y = mean_wind, color = season)) +
geom_point() + geom_smooth(se = FALSE)

Q8: Ratio vs Difference
daily <- daily %>%
mutate(
ratio = max_temp / min_temp,
diff = max_temp - min_temp
)
p1 <- ggplot(daily, aes(x = season, y = ratio)) + geom_boxplot()
p2 <- ggplot(daily, aes(x = diff)) + geom_histogram(bins = 30) + facet_wrap(~season, nrow = 2)
ggarrange(p1, p2, ncol = 2)

Q9: Line Plot
p <- ggplot(daily, aes(x = date)) +
geom_line(aes(y = mean_temp, color = "Mean")) +
geom_line(aes(y = min_temp, color = "Min")) +
geom_line(aes(y = max_temp, color = "Max")) +
theme_minimal()
ggplotly(p)
# Animation
p_anim <- p + transition_reveal(date)
animate(p_anim)

Q10: Polar Area Chart
monthly <- weather %>%
group_by(month) %>%
summarise(
min_temp = min(temp, na.rm = TRUE),
max_temp = max(temp, na.rm = TRUE),
.groups = "drop"
)
ggplot(monthly) +
geom_col(aes(x = factor(month), y = min_temp), fill = "lightblue") +
geom_col(aes(x = factor(month), y = max_temp - min_temp),
position = position_nudge(y = monthly$min_temp), fill = "darkblue") +
coord_polar() + theme_minimal()
